Out of 30 samples, we selected 17 for this study. These are the normal tissue samples form the control, the UVA and the UVA+SFN treatment groups. normal tissue samples from the UVB_UA groups as well as tumor samples were excluded from this analysis. Additionally, one of the control samples at Week 2 (baseline) was removed after outlier analysis.
7,219 genes with zero counts in > 80% (> 13 out of 18) of samples were removed. 17,202 out of 24,421 genes were left.
[1] 7219
[1] 17202
Next, we noramized the counts. To convert number of hits to the relative abundane of genes in each sample, we used transcripts per kilobase million (TPM) normalization, which is as following for the j-th sample:
1. normilize for gene length: a[i, j] = 1,000*count[i, j]/gene[i, j] length(bp)
2. normalize for seq depth (i.e. total count): a(i, j)/sum(a[, j])
3. multiply by one million
A very good comparison of normalization techniques can be found at the following video:
RPKM, FPKM and TPM, clearly explained
After the normalization, each sample’s total is 1M:
02w_CON_0 02w_SFN_0 02w_SFN_1 02w_UVB_0 02w_UVB_1 15w_CON_0 15w_CON_1 15w_SFN_0
1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
15w_SFN_1 15w_UVB_0 15w_UVB_1 25w_CON_0 25w_CON_1 25w_SFN_0 25w_SFN_1 25w_UVB_0
1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06
25w_UVB_1
1e+06
# Separate top 100 abundant genes
tmp <- droplevels(tpm[Geneid %in% levels(tpm$Geneid)[(nrow(tpm) - 99):nrow(tpm)]])
tmp <- melt.data.table(data = tmp,
id.vars = 1:2,
measure.vars = 3:ncol(tmp),
variable.name = "Sample",
value.name = "TPM")
tmp$Week <- substr(x = tmp$Sample,
start = 1,
stop = 3)
tmp$Week <- factor(tmp$Week,
levels = unique(tmp$Week))
tmp$Treatment <- substr(x = tmp$Sample,
start = 5,
stop = 7)
tmp$Treatment <- factor(tmp$Treatment,
levels = c("CON",
"UVB",
"SFN"))
tmp$Replica <- substr(x = tmp$Sample,
start = 9,
stop = 9)
tmp$Replica <- factor(tmp$Replica,
levels = 0:1)
# Plot top 100 abundant genes
p2 <- ggplot(tmp,
aes(x = TPM,
y = Geneid,
fill = Treatment,
shape = Week)) +
# facet_wrap(~ Sex, nrow = 1) +
geom_point(size = 3,
alpha = 0.5) +
geom_vline(xintercept = 1,
linetype = "dashed")
ggplotly(p2)
tmp <- droplevels(tpm[Geneid %in% levels(tpm$Geneid)[1:100]])
tmp <- melt.data.table(data = tmp,
id.vars = 1:2,
measure.vars = 3:ncol(tmp),
variable.name = "Sample",
value.name = "TPM")
tmp$Week <- substr(x = tmp$Sample,
start = 1,
stop = 3)
tmp$Week <- factor(tmp$Week,
levels = unique(tmp$Week))
tmp$Treatment <- substr(x = tmp$Sample,
start = 5,
stop = 7)
tmp$Treatment <- factor(tmp$Treatment,
levels = c("CON",
"UVB",
"SFN"))
tmp$Replica <- substr(x = tmp$Sample,
start = 9,
stop = 9)
tmp$Replica <- factor(tmp$Replica,
levels = 0:1)
# Plot top 100 abundant genes
p3 <- ggplot(tmp,
aes(x = TPM,
y = Geneid,
fill = Treatment,
shape = Week)) +
# facet_wrap(~ Sex, nrow = 1) +
geom_point(size = 3,
alpha = 0.5) +
geom_vline(xintercept = 1,
linetype = "dashed")
ggplotly(p3)
dmeta <- data.table(Sample = colnames(dt1)[-c(1:2)])
dmeta$time <- substr(x = dmeta$Sample,
start = 1,
stop = 3)
dmeta$time <- factor(dmeta$time,
levels = c("02w",
"15w",
"25w"))
dmeta$Week <- factor(dmeta$time,
levels = c("02w",
"15w",
"25w"),
labels = c("Week 2",
"Week 15",
"Week 25"))
dmeta$trt <- substr(x = dmeta$Sample,
start = 5,
stop = 7)
dmeta$trt <- factor(dmeta$trt,
levels = c("CON",
"UVB",
"SFN"))
dmeta$Treatment <- factor(dmeta$trt,
levels = c("CON",
"UVB",
"SFN"),
labels = c("Negative Control",
"Positive Control (UVB)",
"Sulforaphane (SFN)"))
dmeta$Replica <- substr(x = dmeta$Sample,
start = 9,
stop = 9)
dmeta$Replica <- factor(dmeta$Replica,
levels = 0:1)
datatable(dmeta,
options = list(pageLength = nrow(dmeta)))
NOTE: the distributions are skewed. To make them symmetric, log transformation is often applied. However, there is an issue of zeros. In this instance, we added a small values lambda[i] equal to 1/10 of the smallest non-zero value of i-th gene.
dm.tpm <- as.matrix(tpm[, -c(1:2), with = FALSE])
rownames(dm.tpm) <- tpm$Geneid
# # Remove 02w_CON_1 sample and redo PCA
# dm.tpm <- dm.tpm[, colnames(dm.tpm) != "02w_CON_1"]
# dmeta <- dmeta[dmeta$Sample != "02w_CON_1", ]
# Add lambdas to all values, then take a log
dm.ltpm <- t(apply(X = dm.tpm,
MARGIN = 1,
FUN = function(a) {
lambda <- min(a[a > 0])/10
log(a + lambda)
}))
# PCA----
m1 <- prcomp(t(dm.ltpm),
center = TRUE,
scale. = TRUE)
s1 <- summary(m1)
s1
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7
Standard deviation 66.5041 61.8206 45.2845 30.42909 28.24422 26.84136 25.01865
Proportion of Variance 0.2571 0.2222 0.1192 0.05383 0.04637 0.04188 0.03639
Cumulative Proportion 0.2571 0.4793 0.5985 0.65232 0.69869 0.74058 0.77696
PC8 PC9 PC10 PC11 PC12 PC13 PC14
Standard deviation 23.05989 22.08373 21.24391 20.87624 20.6980 20.28169 19.42403
Proportion of Variance 0.03091 0.02835 0.02624 0.02534 0.0249 0.02391 0.02193
Cumulative Proportion 0.80788 0.83623 0.86246 0.88780 0.9127 0.93662 0.95855
PC15 PC16 PC17
Standard deviation 19.14803 18.61200 2.085e-13
Proportion of Variance 0.02131 0.02014 0.000e+00
Cumulative Proportion 0.97986 1.00000 1.000e+00
imp <- data.table(PC = colnames(s1$importance),
Variance = 100*s1$importance[2, ],
Cumulative = 100*s1$importance[3, ])
imp$PC <- factor(imp$PC,
levels = imp$PC)
p1 <- ggplot(imp,
aes(x = PC,
y = Variance)) +
geom_bar(stat = "identity",
fill = "grey",
color = "black") +
geom_line(aes(y = rescale(Cumulative,
to = c(min(Cumulative)*30/100,
30)),
group = rep(1, nrow(imp)))) +
geom_point(aes(y = rescale(Cumulative,
to = c(min(Cumulative)*30/100,
30)))) +
scale_y_continuous("% Variance Explained",
breaks = seq(0, 30, by = 5),
labels = paste(seq(0, 30, by = 5),
"%",
sep = ""),
sec.axis = sec_axis(trans = ~.,
name = "% Cumulative Variance",
breaks = seq(0, 30, length.out = 5),
labels = paste(seq(0, 100, length.out = 5),
"%",
sep = ""))) +
scale_x_discrete("") +
theme(axis.text.x = element_text(angle = 90,
hjust = 1))
print(p1)
# Save for publication
tiff(filename = "tmp/pca_pareto.tiff",
height = 6,
width = 8,
units = 'in',
res = 600,
compression = "lzw+p")
print(p1)
graphics.off()
# Biplot while keep only the most important variables (Javier)----
# Select PC-s to pliot (PC1 & PC2)
choices <- c(1:3)
# Scores, i.e. points (df.u)
dt.scr <- data.table(m1$x[, choices])
# Add grouping variables
dt.scr$trt <- dmeta$trt
dt.scr$time <- dmeta$time
dt.scr$sample <- dmeta$Sample
# Loadings, i.e. arrows (df.v)
dt.rot <- as.data.frame(m1$rotation[, choices])
dt.rot$feat <- rownames(dt.rot)
dt.rot <- data.table(dt.rot)
# Axis labels
u.axis.labs <- paste(colnames(dt.rot)[choices],
sprintf('(%0.1f%% explained var.)',
100*m1$sdev[choices]^2/sum(m1$sdev^2)))
p1 <- ggplot(data = dt.scr,
aes(x = PC1,
y = PC2,
color = trt,
shape = time)) +
geom_point(size = 4,
alpha = 0.5) +
scale_x_continuous(u.axis.labs[1]) +
scale_y_continuous(u.axis.labs[2]) +
theme(legend.position = "none")
ggplotly(p1)
p2 <- ggplot(data = dt.scr,
aes(x = PC1,
y = PC3,
color = trt,
shape = time)) +
geom_point(size = 4,
alpha = 0.5) +
scale_x_continuous(u.axis.labs[1]) +
scale_y_continuous(u.axis.labs[3]) +
theme(legend.position = "none")
ggplotly(p2)
p3 <- ggplot(data = dt.scr,
aes(x = PC2,
y = PC3,
color = trt,
shape = time)) +
geom_point(size = 4,
alpha = 0.5) +
scale_x_continuous(u.axis.labs[2]) +
scale_y_continuous(u.axis.labs[3]) +
theme(legend.position = "none")
ggplotly(p3)
# Legend only
tmp <- ggplot(data = dt.scr,
aes(x = PC1,
y = PC2,
color = trt,
shape = time)) +
geom_point() +
scale_color_discrete("Treatment") +
scale_shape_discrete("Week")
p4 <- as_ggplot(get_legend(tmp))
# Save for publication
tiff(filename = "tmp/pca.tiff",
height = 7,
width = 9,
units = 'in',
res = 600,
compression = "lzw+p")
grid.arrange(p1, p2, p3, p4,
nrow = 2)
graphics.off()
scatterplot3js(x = dt.scr$PC1,
y = dt.scr$PC2,
z = dt.scr$PC3,
color = as.numeric(dt.scr$trt),
renderer = "auto",
pch = dt.scr$sample,
size = 0.1)
Sources:
1. Analyzing RNA-seq data with DESeq2:Interactions
2. Bioconductor Question: DESeq2 time series analysis
We are testing a model with time*treatment interaction. The idea here is to find genes with significant interaction term. That would suggest that the gene expressiondifferences between the treatments depended on time. THere are several possible scenarios:
a. No difference between the negative control and the positive control groups at baseline, significant difference at the later time point. This will show the effect of the disease (UVB radiation, in this case).
b. Significant difference between the control groups at baseline, no difference at the later time point. Same as (a) above.
c. Differences between the positive control and the SFN-treated groups. Here, we are interested in the reversal of UVB effect. Again, the interaction term will need to be significant for the reasons described above.
# Relevel: make all comparisons with the positive control (UVB)
dmeta$trt <- factor(dmeta$trt,
levels = c("UVB",
"CON",
"SFN"))
dtm<- as.matrix(dt1[, dmeta$Sample,
with = FALSE])
rownames(dtm) <- dt1$Geneid
dds <- DESeqDataSetFromMatrix(countData = dtm,
colData = dmeta,
~ time + trt + time:trt)
# If all samples contain zeros, geometric means cannot be
# estimated. Change default 'type = "ratio"' to 'type = "poscounts"'.
# Type '?DESeq2::estimateSizeFactors' for more details.
dds <- estimateSizeFactors(object = dds,
type = "poscounts")
# Run DESeq----
dds <- DESeq(object = dds,
# test = "LRT",
# reduced = ~ time + trt,
fitType = "local",
sfType = "ratio",
parallel = FALSE)
using pre-existing size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
# NOTE (from DESeq help file, section Value):
# A DESeqDataSet object with results stored as metadata columns.
# These results should accessed by calling the results function.
# By default this will return the log2 fold changes and p-values
# for the last variable in the design formula.
# See results for how to access results for other variables.
# In this case, the last term is the interaction term trt:time
# NOTE:
# Likelihood ratio test (LRT) (chi-squared test) for GLM will only return
# the results for the difference between the full and the reduced model
resultsNames(dds)
[1] "Intercept" "time_15w_vs_02w" "time_25w_vs_02w" "trt_CON_vs_UVB"
[5] "trt_SFN_vs_UVB" "time15w.trtCON" "time25w.trtCON" "time15w.trtSFN"
[9] "time25w.trtSFN"
# Model matrix
mm1 <- model.matrix(~ time + trt + time:trt, dmeta)
mm1
(Intercept) time15w time25w trtCON trtSFN time15w:trtCON time25w:trtCON
1 1 0 0 1 0 0 0
2 1 0 0 0 1 0 0
3 1 0 0 0 1 0 0
4 1 0 0 0 0 0 0
5 1 0 0 0 0 0 0
6 1 1 0 1 0 1 0
7 1 1 0 1 0 1 0
8 1 1 0 0 1 0 0
9 1 1 0 0 1 0 0
10 1 1 0 0 0 0 0
11 1 1 0 0 0 0 0
12 1 0 1 1 0 0 1
13 1 0 1 1 0 0 1
14 1 0 1 0 1 0 0
15 1 0 1 0 1 0 0
16 1 0 1 0 0 0 0
17 1 0 1 0 0 0 0
time15w:trtSFN time25w:trtSFN
1 0 0
2 0 0
3 0 0
4 0 0
5 0 0
6 0 0
7 0 0
8 1 0
9 1 0
10 0 0
11 0 0
12 0 0
13 0 0
14 0 1
15 0 1
16 0 0
17 0 0
attr(,"assign")
[1] 0 1 1 2 2 3 3 3 3
attr(,"contrasts")
attr(,"contrasts")$time
[1] "contr.treatment"
attr(,"contrasts")$trt
[1] "contr.treatment"
res_con_uvb_week2 <- results(dds,
contrast = c(0,0,0,1,0,0,0,0,0),
alpha = 0.1)
res_con_uvb_week2 <- res_con_uvb_week2[order(res_con_uvb_week2$padj,
decreasing = FALSE),]
summary(res_con_uvb_week2)
out of 17202 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 1546, 9%
LFC < 0 (down) : 1537, 8.9%
outliers [1] : 0, 0%
low counts [2] : 2335, 14%
(mean count < 2)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
# How many adjusted p-values were less than 0.05?
sum(res_con_uvb_week2$padj < 0.1,
na.rm = TRUE)
[1] 3083
# MA plot
plotMA(res_con_uvb_week2,
main = "Control vs. UVB at Week 2",
alpha = 0.8)
# Save for publication
tiff(filename = "tmp/ma_w2_con_uvb.tiff",
height = 6,
width = 7,
units = 'in',
res = 600,
compression = "lzw+p")
plotMA(res_con_uvb_week2,
main = "Control vs. UVB at Week 2",
alpha = 0.8)
graphics.off()
res_sfn_uvb_week2 <- results(dds,
contrast = c(0,0,0,0,1,0,0,0,0),
alpha = 0.1)
res_sfn_uvb_week2 <- res_sfn_uvb_week2[order(res_sfn_uvb_week2$padj,
decreasing = FALSE),]
summary(res_sfn_uvb_week2)
out of 17202 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 26, 0.15%
LFC < 0 (down) : 35, 0.2%
outliers [1] : 0, 0%
low counts [2] : 3669, 21%
(mean count < 5)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
# How many adjusted p-values were less than 0.05?
sum(res_sfn_uvb_week2$padj < 0.1,
na.rm = TRUE)
[1] 61
# MA plot
print(plotMA(res_sfn_uvb_week2,
main = "UVB+SFN vs UVB at Week 2",
alpha = 0.8))
NULL
# Save for publication
tiff(filename = "tmp/ma_w2_sfn_uvb.tiff",
height = 6,
width = 7,
units = 'in',
res = 600,
compression = "lzw+p")
print(plotMA(res_sfn_uvb_week2,
main = "UVB+SFN vs UVB at Week 2",
alpha = 0.8))
NULL
graphics.off()
lgene.w2.con <- unique(res_con_uvb_week2@rownames[res_con_uvb_week2$padj < 0.1])
lgene.w2.sfn <- unique(res_sfn_uvb_week2@rownames[res_sfn_uvb_week2$padj < 0.1])
lgene.w2 <- lgene.w2.con[lgene.w2.con %in% lgene.w2.sfn]
lgene.w2 <- lgene.w2 [!is.na(lgene.w2 )]
lgene.w2
[1] "Utrn" "Stom" "Tesc" "Cited4" "Cdhr1" "Slc7a11" "Mki67" "Cyp26b1"
[9] "Smc2" "Mad2l1" "Slc4a7" "Ankrd23" "Ifitm3" "Etv3" "Pla2g4d" "Fetub"
[17] "Kif11" "Ccl6" "Has3" "Il19" "A4galt" "Otud1" "Msn" "Nqo1"
[25] "Dbf4" "Cblb" "Tbc1d24" "Elmo2" "Cd163" "Esd" "Rfx2" "Gsta1"
[33] "Slurp1" "Arntl2" "Vldlr" "Tmem173" "Gpx2" "Slfn9" "Adh7" "Sprr2i"
[41] "Bcl2l15"
Plot of DESeq-normalizedcounts of genes significant in both comparisons at Week 2:
# Get the DESeq-normalize counts
dp1 <- list()
for (i in 1:length(lgene.w2)) {
out <- plotCounts(dds,
gene = lgene.w2[[i]],
intgroup = c("trt",
"time"),
returnData = TRUE)
dp1[[i]] <- data.table(Geneid = lgene.w2[[i]],
Sample = rownames(out),
out)
}
dp1 <- rbindlist(dp1)
dp1$trt <- factor(dp1$trt,
levels = c("CON",
"UVB",
"SFN"))
dp1$time <- factor(dp1$time,
levels = c("02w",
"15w",
"25w"),
labels = c("Week 2",
"Week 15",
"Week 25"))
dp1$Geneid <- factor(dp1$Geneid,
levels = lgene.w2)
dp1[, mu := mean(count,
na.rm = TRUE),
by = c("Geneid",
"trt",
"time")]
dmu <- unique(dp1[, -c("Sample",
"count")])
head(dmu)
List of 1
$ axis.text.x:List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : num 1
..$ vjust : NULL
..$ angle : num 45
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi FALSE
..- attr(*, "class")= chr [1:2] "element_text" "element"
- attr(*, "class")= chr [1:2] "theme" "gg"
- attr(*, "complete")= logi FALSE
- attr(*, "validate")= logi TRUE
List of 1
$ axis.text.x:List of 11
..$ family : NULL
..$ face : NULL
..$ colour : NULL
..$ size : NULL
..$ hjust : num 1
..$ vjust : NULL
..$ angle : num 45
..$ lineheight : NULL
..$ margin : NULL
..$ debug : NULL
..$ inherit.blank: logi FALSE
..- attr(*, "class")= chr [1:2] "element_text" "element"
- attr(*, "class")= chr [1:2] "theme" "gg"
- attr(*, "complete")= logi FALSE
- attr(*, "validate")= logi TRUE
dp1.tmp <- dp1[dp1$Geneid %in% unique(dmu.w2$Geneid[dmu.w2$up.dn]), ]
dmu.tmp <- dmu[dmu$Geneid %in% unique(dmu.w2$Geneid[dmu.w2$up.dn]), ]
p1 <- ggplot(dp1.tmp,
aes(x = time,
y = count,
group = trt,
fill = trt)) +
facet_wrap(~ Geneid,
scale = "free_y") +
geom_point(position = position_dodge(0.5),
shape = 21,
size = 5,
color = "black") +
geom_line(data = dmu.tmp,
aes(x = time,
y = mu,
group = trt,
colour = trt),
position = position_dodge(0.5),
alpha = 0.5,
size = 2) +
scale_x_discrete("") +
scale_y_continuous("DESeq-Normalized Counts") +
scale_fill_discrete("Treatment")
print(p1)
dp1.tmp <- dp1[dp1$Geneid %in% unique(dmu.w2$Geneid[dmu.w2$dn.up]), ]
dmu.tmp <- dmu[dmu$Geneid %in% unique(dmu.w2$Geneid[dmu.w2$dn.up]), ]
p1 <- ggplot(dp1.tmp,
aes(x = time,
y = count,
group = trt,
fill = trt)) +
facet_wrap(~ Geneid,
scale = "free_y") +
geom_point(position = position_dodge(0.5),
shape = 21,
size = 5,
color = "black") +
geom_line(data = dmu.tmp,
aes(x = time,
y = mu,
group = trt,
colour = trt),
position = position_dodge(0.5),
alpha = 0.5,
size = 2) +
scale_x_discrete("") +
scale_y_continuous("DESeq-Normalized Counts") +
scale_fill_discrete("Treatment")
print(p1)
In many of these genes, UVB+SFN moved closer to UVB over time.
Tests if the effect of NOT treating with UVB vs. treating with UVB is different at Week 15 compared to Week 2:
res_int_con_uvb_week <- results(dds,
name = "time15w.trtCON",
alpha = 0.1)
res_int_con_uvb_week <- res_int_con_uvb_week[order(res_int_con_uvb_week$padj,
decreasing = FALSE),]
print(res_int_con_uvb_week)
log2 fold change (MLE): time15w.trtCON
Wald test p-value: time15w.trtCON
DataFrame with 17202 rows and 6 columns
baseMean log2FoldChange lfcSE stat
<numeric> <numeric> <numeric> <numeric>
Ces2g 1233.64052107766 1.65919128471462 0.219706072210844 7.55186813008224
Chil4 729.990857182023 -11.1293956873127 1.54858690264879 -7.18680731980641
Tiparp 683.339510901133 1.49955960160452 0.248262587560916 6.04021579061555
Slc25a37 391.064324378349 -1.45460152768479 0.244923011988198 -5.93901534966785
H2-M2 206.94506916379 -1.98012352045144 0.34512732806993 -5.73737099152641
... ... ... ... ...
Gpm6b 6.76909966446477 -3.26887203161146 1.55193921192052 -2.10631447836558
Tlr7 1.11233183040672 0.165798521539309 3.90101579949831 0.0425013714532075
Arhgap6 1.55558065988387 0.701543913331167 2.69733929865055 0.26008738080602
Spry3 2.92590454614356 -0.756574618876826 2.19462355565 -0.344740042969577
Zf12 0.240459283234895 1.53995508412402 7.75773006704587 0.198505886491927
pvalue padj
<numeric> <numeric>
Ces2g 4.29058805762623e-14 5.23408837149824e-10
Chil4 6.63239033912628e-13 4.04542648735007e-09
Tiparp 1.53908254415537e-09 6.25842265205047e-06
Slc25a37 2.8673902141708e-09 8.74482330566741e-06
H2-M2 9.61574727613009e-09 2.34605002043022e-05
... ... ...
Gpm6b 0.0351770444831032 NA
Tlr7 0.966099018478313 NA
Arhgap6 0.794796371714883 NA
Spry3 0.730289811494176 NA
Zf12 0.842649279637729 NA
summary(res_int_con_uvb_week)
out of 17202 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 62, 0.36%
LFC < 0 (down) : 81, 0.47%
outliers [1] : 0, 0%
low counts [2] : 5003, 29%
(mean count < 14)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
# How many adjusted p-values were less than 0.05?
sum(res_int_con_uvb_week$padj < 0.1,
na.rm = TRUE)
[1] 143
# MA plot
print(plotMA(res_int_con_uvb_week,
main = "(Control vs. UVB) x TIme Interaction",
alpha = 0.9))
NULL
Tests if the effect of treating with UVB+SFN vs. treating with UVB is different at Week 15 compared to Week 2:
res_int_sfn_uvb_week <- results(dds,
name = "time15w.trtSFN",
alpha = 0.1)
res_int_sfn_uvb_week <- res_int_sfn_uvb_week[order(res_int_sfn_uvb_week$padj,
decreasing = FALSE),]
print(res_int_sfn_uvb_week)
log2 fold change (MLE): time15w.trtSFN
Wald test p-value: time15w.trtSFN
DataFrame with 17202 rows and 6 columns
baseMean log2FoldChange lfcSE
<numeric> <numeric> <numeric>
Sprr2i 160.426257994504 2.83987384043744 0.455041717539916
Jakmip2 63.5056363214658 3.21303587147397 0.539789109889979
Ankrd37 235.286753079087 1.69690697674512 0.334405230598706
Rabgap1l 952.654178700566 0.885094830670513 0.193655876057218
Xdh 997.089593301958 1.17874278667067 0.254355414980077
... ... ... ...
Tex13 0.257950623188226 -0.171416964885152 6.80505378756029
Trpc5os 0.25226659796839 -1.67756077298105 6.85574125799013
Gm6568 0.246930247155146 1.31275277456817 6.92124043267287
Rs1 0.304746600897428 -2.68634965578062 5.6142429285098
Zf12 0.240459283234895 -1.67793188223364 6.93607854502649
stat pvalue padj
<numeric> <numeric> <numeric>
Sprr2i 6.24090875841143 4.35035967805996e-10 7.33818670495154e-06
Jakmip2 5.95239105903574 2.64253030673362e-09 2.22871006069914e-05
Ankrd37 5.07440321345167 3.8871402035267e-07 0.00218560936510295
Rabgap1l 4.5704517140962 4.86674050843135e-06 0.0139492184264559
Xdh 4.63423507914307 3.58259699502912e-06 0.0139492184264559
... ... ... ...
Tex13 -0.0251896561344605 0.979903687548783 NA
Trpc5os -0.244694294876708 0.806693145810482 NA
Gm6568 0.189670159177119 0.849567605832819 NA
Rs1 -0.478488318013282 0.632302686934451 NA
Zf12 -0.241913621845704 0.808847094876152 NA
summary(res_int_sfn_uvb_week)
out of 17202 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 11, 0.064%
LFC < 0 (down) : 2, 0.012%
outliers [1] : 0, 0%
low counts [2] : 334, 1.9%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results
# How many adjusted p-values were less than 0.05?
sum(res_int_sfn_uvb_week$padj < 0.1,
na.rm = TRUE)
[1] 13
# MA plot
print(plotMA(res_int_sfn_uvb_week))
NULL
# NOTE: same as
# res <- results(dds,
# alpha = 0.05)
# res <- res[order(res$padj, decreasing = FALSE),]
# res
NOTE: By default, the results(dds)* prints the results for the last level of the last term, i.e. here it was for for the interaction term SFN vs. UVB at Week 15 vs. Week 2.
lgene.con <- unique(res_int_con_uvb_week@rownames[res_int_con_uvb_week$padj < 0.1])
lgene.sfn <- unique(res_int_sfn_uvb_week@rownames[res_int_sfn_uvb_week$padj < 0.1])
lgene <- lgene.con[lgene.con %in% lgene.sfn]
lgene <- lgene[!is.na(lgene)]
lgene
[1] "Jakmip2" "Rabgap1l" "Alox8" "Xdh"
Plot of DESeq-normalizedcounts of genes with smallest adjusted p-value for the interaction term:
# Get the DESeq-normalize counts
dp1 <- list()
for (i in 1:length(lgene)) {
out <- plotCounts(dds,
gene = lgene[[i]],
intgroup = c("trt",
"time"),
returnData = TRUE)
dp1[[i]] <- data.table(Geneid = lgene[[i]],
Sample = rownames(out),
out)
}
dp1 <- rbindlist(dp1)
dp1$trt <- factor(dp1$trt,
levels = c("CON",
"UVB",
"SFN"))
dp1$time <- factor(dp1$time,
levels = c("02w",
"15w"),
labels = c("Week 2",
"Week 15"))
dp1$Geneid <- factor(dp1$Geneid,
levels = lgene)
dp1[, mu := mean(count,
na.rm = TRUE),
by = c("Geneid",
"trt",
"time")]
dmu <- unique(dp1[, -c("Sample",
"count")])
p1 <- ggplot(dp1,
aes(x = time,
y = count,
group = trt,
fill = trt)) +
facet_wrap(~ Geneid,
scale = "free_y") +
geom_point(position = position_dodge(0.5),
shape = 21,
size = 5,
color = "black") +
geom_line(data = dmu,
aes(x = time,
y = mu,
group = trt,
colour = trt),
position = position_dodge(0.5),
alpha = 0.5,
size = 2) +
scale_x_discrete("") +
scale_y_continuous("DESeq-Normalized Counts") +
scale_fill_discrete("Treatment")
print(p1)
Compare to the plot of TPM-normalizedcounts of genes with smallest adjusted p-value for the interaction term:
# Examine TPM values for the same genes
tmp <- tpm[Geneid %in% lgene, ]
tmp$Geneid <- factor(tmp$Geneid,
levels = lgene)
tmp <- melt.data.table(data = tmp,
id.vars = 1,
measure.vars = 3:ncol(tmp),
variable.name = "Sample",
value.name = "TPM")
tmp <- merge(dmeta,
tmp,
by = "Sample")
p1 <- ggplot(tmp,
aes(x = Week,
y = TPM,
fill = Treatment,
group = Treatment)) +
facet_wrap(~ Geneid,
scales = "free_y") +
geom_point(position = position_dodge(0.5),
shape = 21,
size = 5,
color = "black")+
scale_x_discrete("")
plot(p1)
sessionInfo()